import re

with open("cleaned_demo.html", "r", encoding="utf-8") as f:
    html = f.read()

# 【1】提取所有符合条件的src
pattern = re.compile(
    r'<div class="video-card__puzzle[^>]*>[\s\S]*?<img [^>]*src="([^"]+)"', re.DOTALL)
matches = pattern.findall(html)

result_urls = []

for src in matches:
    # 【2】从 src 提取'2024-10-12/413383d398144f96acb78bc08e243a0d'
    path_pattern = re.compile(r'/(\d{4}-\d{2}-\d{2}/[a-f0-9]{32,})(/|\.|_)')
    path_match = path_pattern.search(src)
    if path_match:
        path_segment = path_match.group(1)
        # 【3】拼接mp4链接
        mp4_url = f"https://mp4.vjshi.com/{path_segment}.mp4"
        result_urls.append(mp4_url)

# 打印结果
for url in result_urls:
    print(url)